{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "2f8ab1ed",
   "metadata": {},
   "source": [
    "# Build Regression Coefficients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f269af41",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import statsmodels.formula.api as sm\n",
    "import datetime\n",
    "import helper_functions as fcn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "73f531f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('../data_general/drones_combined_notated.csv', engine='python', encoding='unicode_escape')\n",
    "data = data[~data['time_start'].isnull()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d275de84",
   "metadata": {},
   "outputs": [],
   "source": [
    "strike_locations = pd.read_csv('../data_si/drones_combined_clean.csv')[['District','Governate','Date','Latitude']]\n",
    "strike_locations['latitude_2'] = strike_locations['Latitude'].apply(lambda x: np.round(x, decimals=3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c68500b",
   "metadata": {},
   "outputs": [],
   "source": [
    "strike_districts = pd.read_csv('../data_si/drones_district_ids.csv',index_col=0)\n",
    "strike_districts.index = strike_districts.index.astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a633da32",
   "metadata": {},
   "outputs": [],
   "source": [
    "population = pd.read_csv('../data_si/cso_2016_population_projection_sexage_disagreggated.csv', header=[1])\n",
    "population = population.iloc[:-1]\n",
    "population = population.set_index('District P-Code')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5c30b639",
   "metadata": {},
   "outputs": [],
   "source": [
    "# add time of day of strikes as binary variable\n",
    "data['morning'] = 0; data['day'] = 0; data['evening'] = 0\n",
    "morning_cutoff = datetime.time(8, 0)\n",
    "day_cutoff = datetime.time(16, 0)\n",
    "    \n",
    "for idx, row in data.iterrows():\n",
    "    start_time, end_time = fcn.get_start_end_times_corrected(row)\n",
    "    \n",
    "    if start_time.time() < morning_cutoff:\n",
    "        data.loc[idx, 'morning'] = 1\n",
    "    elif (start_time.time() >= morning_cutoff) & (start_time.time() <= day_cutoff):\n",
    "        data.loc[idx, 'day'] = 1\n",
    "    elif (start_time.time() > day_cutoff):\n",
    "        data.loc[idx, 'evening'] = 1\n",
    "    else:\n",
    "        print('Error: Time of day not added for %d' %idx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ae7b913",
   "metadata": {},
   "outputs": [],
   "source": [
    "data['rank_militants_binary'] = (data['rank_militants'].str.lower() == 'high level').astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dbac4f29",
   "metadata": {},
   "outputs": [],
   "source": [
    "data['parsed_date_2'] = data['parsed_date']\n",
    "data['latitude_2'] = data['latitude'].apply(lambda x: np.round(x, decimals=3))\n",
    "\n",
    "data = data.merge(strike_locations, left_on=['parsed_date_2','latitude_2'], right_on=['Date','latitude_2'], how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57ac6dcc",
   "metadata": {},
   "outputs": [],
   "source": [
    "stime = {idx: fcn.get_start_end_times_corrected(row)[0] for idx, row in data.iterrows()}\n",
    "data['stime'] = pd.Series(stime)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "046f4d06",
   "metadata": {},
   "outputs": [],
   "source": [
    "data['past_month'] = np.nan\n",
    "for idx, row in data.iterrows():\n",
    "    recent = data[(data['stime'] < row['stime']) & (data['stime'] > row['stime'] - pd.Timedelta('30 days'))]\n",
    "    data.loc[idx, 'past_month'] = (recent['District'] == row['District']).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a26726d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.merge(strike_districts, left_on='new_id', right_index=True, how='left')\n",
    "data = data.merge(population['TOTAL'].str.replace(',','').astype(int).to_frame('pop'), left_on='district_id', right_index=True, how='left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bc7fb332",
   "metadata": {},
   "outputs": [],
   "source": [
    "data['militants_killed_high'] = data['militants_killed_high'].fillna(\n",
    "                                    value=(data['total_killed_high'] - data['civilians_killed_high']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "957ea2a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.set_index('new_id')\n",
    "data.index = data.index.astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9ba74f47",
   "metadata": {},
   "outputs": [],
   "source": [
    "data['incr_G1_1hr'] = pd.concat(cascades, axis=1).xs('G1', axis=1, level=1).loc[6]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d6bfc56",
   "metadata": {},
   "outputs": [],
   "source": [
    "# table 1B\n",
    "r = sm.ols(formula = 'reloc' + ' ~ civilians_killed_high + militants_killed_high + rank_militants_binary'\n",
    "                             + '+ morning + evening + past_month + pop', data=data).fit(cov_type='HC0')\n",
    "r.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ff940663",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "drones",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
